Erythromycin resistance
- Erythromycin resistance in CC22’s conferred by 2473 bp plasmid encoding repL and ermC
NC_018969.txt from https://www.ncbi.nlm.nih.gov/pubmed/20943864/
- First lets load the overall genome coverage statistics for each isolate
coverages.NC_017763 <- read_tsv('stats.coverage.NC_017763.txt', show_col_types = FALSE)
- First lets load the read depths for each isolate mapped to the reference plasmid
depths <- 'NC_018969.tsv'
data <- read_tsv(file=depths, show_col_types = FALSE)
data
- This data file contains the number of reads mapping at each genome position (POS) of the reference (NC_018969.1). The columns are labelled by the name of the alignment file (*.bam)
- Lets rename the first column, as it begins with a # which can cause problems when specifying columns
data %>%
rename(name='#CHROM')
- Lets also pivit the data to produce the long form of the table
data %>%
rename(name='#CHROM') %>%
pivot_longer(cols=c(-name, -POS), names_to = 'isolate', values_to = 'depth')
- And we need to extract out the isolate name from the alignment file paths, we can do this with a combination of te
mutate() and str_replace() functions
- we can also save the data into a variable called
long_data
long_data <- data %>%
rename(name='#CHROM') %>%
pivot_longer(cols=c(-name, -POS), names_to = 'isolate', values_to = 'depth') %>%
mutate(isolate=str_replace(isolate, 'results/alignments/NC_018969_(.+).bam', '\\1'))
long_data
- In order to plot for a single isolate we need to first filter the data, then pass the data into
ggplot(). Here we plot the plasmid position (POS) against the number of reads covering that position (depth) using geom_line() setting the line width to 2
long_data %>%
filter(isolate == 'SEQ045') %>%
ggplot(aes(x=POS, y=depth)) +
geom_line(size=2)

- lets add two arrows to reprsent the position of the genes on the plasmid, we just prove x,y coords and some style attributes
long_data %>%
filter(isolate == 'SEQ045') %>%
ggplot(aes(x=POS, y=depth, group=isolate)) +
geom_line(size=2) +
geom_segment(aes(x=1,y=1500,xend=477,yend=1500),size=3,color='blue',arrow=arrow(length=unit(0.4,"cm"))) +
geom_segment(aes(x=1292,y=1500,xend=2026,yend=1500),size=3,color='red',arrow=arrow(length=unit(0.4,"cm")))

- Finally we can change the axes labels with
labs() and set the theme to theme_bw()
long_data %>%
filter(isolate == 'SEQ045') %>%
ggplot(aes(x=POS, y=depth, group=isolate)) +
geom_line(size=2) +
labs(x='LlaG1 gene position', y='Sequence depth') +
geom_segment(aes(x=1,y=1500,xend=477,yend=1500),size=3,color='blue',arrow=arrow(length=unit(0.4,"cm"))) +
geom_segment(aes(x=1292,y=1500,xend=2026,yend=1500),size=3,color='red',arrow=arrow(length=unit(0.4,"cm"))) +
theme_bw()

- Note that it is sometimes useful to plot the y axis with a log scale, using the function
scale_y_log10()
long_data %>%
filter(isolate == 'SEQ045') %>%
ggplot(aes(x=POS, y=depth, group=isolate)) +
geom_line(size=2) +
scale_y_log10() +
labs(x='LlaG1 gene position', y='Sequence depth') +
geom_segment(aes(x=1,y=1500,xend=477,yend=1500),size=3,color='blue',arrow=arrow(length=unit(0.4,"cm"))) +
geom_segment(aes(x=1292,y=1500,xend=2026,yend=1500),size=3,color='red',arrow=arrow(length=unit(0.4,"cm"))) +
theme_bw()

- if we want to plot the same grph for all isolates we can use
facet_wrap(). not the removal of the filter() function and the inclusion of the group=isolate in the ggplot() function, then we apply facet_wrap(~isolate) to complete the process
long_data %>%
ggplot(aes(x=POS, y=depth, group=isolate)) +
geom_hline(yintercept=0, color="red", size=2) +
geom_line(size=2) +
# scale_y_log10() +
labs(x='LlaG1 gene position', y='Sequence depth') +
geom_segment(aes(x=1,y=5000,xend=477,yend=5000),size=3,color='blue',arrow=arrow(length=unit(0.4,"cm"))) +
geom_segment(aes(x=1292,y=5000,xend=2026,yend=5000),size=3,color='red',arrow=arrow(length=unit(0.4,"cm"))) +
geom_hline(data = coverages.NC_017763, aes(yintercept = coverage), linetype="dashed", color="darkgreen", size=1) +
theme_bw() +
facet_wrap(~isolate)

- We can save that final figure to a file with
ggsave()
ggsave('NC_018969-mapping.png', height=20, width=20)
LS0tCnRpdGxlOiAiVXNpbmcgZ2dwbG90IGZhY2V0aW5nIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgaGlnaGxpZ2h0OiB6ZW5idXJuCiAgICBudW1iZXJfc2VjdGlvbnM6IG5vCiAgICB0aGVtZTogY2VydWxlYW4KICAgIHRvYzogeWVzCiAgICB0b2NfZGVwdGg6IDMKICAgIHRvY19mbG9hdDogeWVzCiAgICBkZl9wcmludDogcGFnZWQKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCioqX1BsZWFzZSBmb2xsb3cgdGhlIGluc3RydWN0aW9ucyBpbiB0aGUgUkVBRE1FIGZpbGUgYmVmb3JlIHN0YXJ0aW5nIHRoaXMgdHV0b3JpYWwuXyoqCgo8aHIgc3R5bGU9ImJvcmRlcjoycHggc29saWQgZ3JheSI+IDwvaHI+CgpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiAKClRyeSBleGVjdXRpbmcgdGhpcyBjaHVuayBieSBjbGlja2luZyB0aGUgKlJ1biogYnV0dG9uIHdpdGhpbiB0aGUgY2h1bmsgb3IgYnkgcGxhY2luZyB5b3VyIGN1cnNvciBpbnNpZGUgaXQgYW5kIHByZXNzaW5nICpDdHJsK1NoaWZ0K0VudGVyKi4gCgo8aHIgc3R5bGU9ImJvcmRlcjoycHggc29saWQgZ3JheSI+IDwvaHI+CgojIExvYWRpbmcgdGhlIGxpYnJhcmllcwoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCiMgRXJ5dGhyb215Y2luIHJlc2lzdGFuY2UKCiogRXJ5dGhyb215Y2luIHJlc2lzdGFuY2UgaW4gQ0MyMidzIGNvbmZlcnJlZCBieSAyNDczIGJwIHBsYXNtaWQgZW5jb2RpbmcgKnJlcEwqIGFuZCAqZXJtQyoKCk5DXzAxODk2OS50eHQgZnJvbSBodHRwczovL3d3dy5uY2JpLm5sbS5uaWguZ292L3B1Ym1lZC8yMDk0Mzg2NC8KCiogRmlyc3QgbGV0cyBsb2FkIHRoZSBvdmVyYWxsIGdlbm9tZSBjb3ZlcmFnZSBzdGF0aXN0aWNzIGZvciBlYWNoIGlzb2xhdGUKCmBgYHtyfQpjb3ZlcmFnZXMuTkNfMDE3NzYzIDwtIHJlYWRfdHN2KCdzdGF0cy5jb3ZlcmFnZS5OQ18wMTc3NjMudHh0Jywgc2hvd19jb2xfdHlwZXMgPSBGQUxTRSkKYGBgCgoqIEZpcnN0IGxldHMgbG9hZCB0aGUgcmVhZCBkZXB0aHMgZm9yIGVhY2ggaXNvbGF0ZSBtYXBwZWQgdG8gdGhlIHJlZmVyZW5jZSBwbGFzbWlkCgpgYGB7cn0KZGVwdGhzIDwtICdOQ18wMTg5NjkudHN2JwoKZGF0YSA8LSByZWFkX3RzdihmaWxlPWRlcHRocywgc2hvd19jb2xfdHlwZXMgPSBGQUxTRSkKZGF0YQpgYGAKCiogVGhpcyBkYXRhIGZpbGUgY29udGFpbnMgdGhlIG51bWJlciBvZiByZWFkcyBtYXBwaW5nIGF0IGVhY2ggZ2Vub21lIHBvc2l0aW9uIChQT1MpIG9mIHRoZSByZWZlcmVuY2UgKE5DXzAxODk2OS4xKS4gVGhlIGNvbHVtbnMgYXJlIGxhYmVsbGVkIGJ5IHRoZSBuYW1lIG9mIHRoZSBhbGlnbm1lbnQgZmlsZSAoKi5iYW0pCiogTGV0cyByZW5hbWUgdGhlIGZpcnN0IGNvbHVtbiwgYXMgaXQgYmVnaW5zIHdpdGggYSAjIHdoaWNoIGNhbiBjYXVzZSBwcm9ibGVtcyB3aGVuIHNwZWNpZnlpbmcgY29sdW1ucwoKYGBge3J9CmRhdGEgJT4lCiAgcmVuYW1lKG5hbWU9JyNDSFJPTScpCmBgYAoqIExldHMgYWxzbyBwaXZpdCB0aGUgZGF0YSB0byBwcm9kdWNlIHRoZSBsb25nIGZvcm0gb2YgdGhlIHRhYmxlCgpgYGB7cn0KZGF0YSAlPiUKICByZW5hbWUobmFtZT0nI0NIUk9NJykgJT4lCiAgcGl2b3RfbG9uZ2VyKGNvbHM9YygtbmFtZSwgLVBPUyksIG5hbWVzX3RvID0gJ2lzb2xhdGUnLCB2YWx1ZXNfdG8gPSAnZGVwdGgnKQpgYGAKIAogKiBBbmQgd2UgbmVlZCB0byBleHRyYWN0IG91dCB0aGUgaXNvbGF0ZSBuYW1lIGZyb20gdGhlIGFsaWdubWVudCBmaWxlIHBhdGhzLCB3ZSBjYW4gZG8gdGhpcyB3aXRoIGEgY29tYmluYXRpb24gb2YgdGUgYG11dGF0ZSgpYCBhbmQgYHN0cl9yZXBsYWNlKClgIGZ1bmN0aW9ucwogKiB3ZSBjYW4gYWxzbyBzYXZlIHRoZSBkYXRhIGludG8gYSB2YXJpYWJsZSBjYWxsZWQgYGxvbmdfZGF0YWAKCmBgYHtyfQpsb25nX2RhdGEgPC0gZGF0YSAlPiUKICByZW5hbWUobmFtZT0nI0NIUk9NJykgJT4lCiAgcGl2b3RfbG9uZ2VyKGNvbHM9YygtbmFtZSwgLVBPUyksIG5hbWVzX3RvID0gJ2lzb2xhdGUnLCB2YWx1ZXNfdG8gPSAnZGVwdGgnKSAlPiUKICBtdXRhdGUoaXNvbGF0ZT1zdHJfcmVwbGFjZShpc29sYXRlLCAncmVzdWx0cy9hbGlnbm1lbnRzL05DXzAxODk2OV8oLispLmJhbScsICdcXDEnKSkKbG9uZ19kYXRhCmBgYAoKKiBJbiBvcmRlciB0byBwbG90IGZvciBhIHNpbmdsZSBpc29sYXRlIHdlIG5lZWQgdG8gZmlyc3QgZmlsdGVyIHRoZSBkYXRhLCB0aGVuIHBhc3MgdGhlIGRhdGEgaW50byBgZ2dwbG90KClgLiBIZXJlIHdlIHBsb3QgdGhlIHBsYXNtaWQgcG9zaXRpb24gKGBQT1NgKSBhZ2FpbnN0IHRoZSBudW1iZXIgb2YgcmVhZHMgY292ZXJpbmcgdGhhdCBwb3NpdGlvbiAoYGRlcHRoYCkgdXNpbmcgYGdlb21fbGluZSgpYCBzZXR0aW5nIHRoZSBsaW5lIHdpZHRoIHRvIDIKCmBgYHtyfQpsb25nX2RhdGEgJT4lCiAgZmlsdGVyKGlzb2xhdGUgPT0gJ1NFUTA0NScpICU+JQogIGdncGxvdChhZXMoeD1QT1MsIHk9ZGVwdGgpKSArIAogICAgZ2VvbV9saW5lKHNpemU9MikgCmBgYAoKKiBsZXRzIGFkZCB0d28gYXJyb3dzIHRvIHJlcHJzZW50IHRoZSBwb3NpdGlvbiBvZiB0aGUgZ2VuZXMgb24gdGhlIHBsYXNtaWQsIHdlIGp1c3QgcHJvdmUgeCx5IGNvb3JkcyBhbmQgc29tZSBzdHlsZSBhdHRyaWJ1dGVzCgpgYGB7cn0KbG9uZ19kYXRhICU+JQogIGZpbHRlcihpc29sYXRlID09ICdTRVEwNDUnKSAlPiUKICBnZ3Bsb3QoYWVzKHg9UE9TLCB5PWRlcHRoLCBncm91cD1pc29sYXRlKSkgKyAKICAgIGdlb21fbGluZShzaXplPTIpICsKICAgIGdlb21fc2VnbWVudChhZXMoeD0xLHk9MTUwMCx4ZW5kPTQ3Nyx5ZW5kPTE1MDApLHNpemU9Myxjb2xvcj0nYmx1ZScsYXJyb3c9YXJyb3cobGVuZ3RoPXVuaXQoMC40LCJjbSIpKSkgKwogICAgZ2VvbV9zZWdtZW50KGFlcyh4PTEyOTIseT0xNTAwLHhlbmQ9MjAyNix5ZW5kPTE1MDApLHNpemU9Myxjb2xvcj0ncmVkJyxhcnJvdz1hcnJvdyhsZW5ndGg9dW5pdCgwLjQsImNtIikpKSAKYGBgCgoqIEZpbmFsbHkgd2UgY2FuIGNoYW5nZSB0aGUgYXhlcyBsYWJlbHMgd2l0aCBgbGFicygpYCBhbmQgc2V0IHRoZSB0aGVtZSB0byBgdGhlbWVfYncoKWAKCmBgYHtyfQpsb25nX2RhdGEgJT4lCiAgZmlsdGVyKGlzb2xhdGUgPT0gJ1NFUTA0NScpICU+JQogIGdncGxvdChhZXMoeD1QT1MsIHk9ZGVwdGgsIGdyb3VwPWlzb2xhdGUpKSArIAogICAgZ2VvbV9saW5lKHNpemU9MikgKwogICAgbGFicyh4PSdMbGFHMSBnZW5lIHBvc2l0aW9uJywgeT0nU2VxdWVuY2UgZGVwdGgnKSArCiAgICBnZW9tX3NlZ21lbnQoYWVzKHg9MSx5PTE1MDAseGVuZD00NzcseWVuZD0xNTAwKSxzaXplPTMsY29sb3I9J2JsdWUnLGFycm93PWFycm93KGxlbmd0aD11bml0KDAuNCwiY20iKSkpICsKICAgIGdlb21fc2VnbWVudChhZXMoeD0xMjkyLHk9MTUwMCx4ZW5kPTIwMjYseWVuZD0xNTAwKSxzaXplPTMsY29sb3I9J3JlZCcsYXJyb3c9YXJyb3cobGVuZ3RoPXVuaXQoMC40LCJjbSIpKSkgKwogICAgdGhlbWVfYncoKQpgYGAKCiogTm90ZSB0aGF0IGl0IGlzIHNvbWV0aW1lcyB1c2VmdWwgdG8gcGxvdCB0aGUgeSBheGlzIHdpdGggYSBsb2cgc2NhbGUsIHVzaW5nIHRoZSBmdW5jdGlvbiBgc2NhbGVfeV9sb2cxMCgpYAoKYGBge3J9CmxvbmdfZGF0YSAlPiUKICBmaWx0ZXIoaXNvbGF0ZSA9PSAnU0VRMDQ1JykgJT4lCiAgZ2dwbG90KGFlcyh4PVBPUywgeT1kZXB0aCwgZ3JvdXA9aXNvbGF0ZSkpICsgCiAgICBnZW9tX2xpbmUoc2l6ZT0yKSArCiAgICBzY2FsZV95X2xvZzEwKCkgKwogICAgbGFicyh4PSdMbGFHMSBnZW5lIHBvc2l0aW9uJywgeT0nU2VxdWVuY2UgZGVwdGgnKSArCiAgICBnZW9tX3NlZ21lbnQoYWVzKHg9MSx5PTE1MDAseGVuZD00NzcseWVuZD0xNTAwKSxzaXplPTMsY29sb3I9J2JsdWUnLGFycm93PWFycm93KGxlbmd0aD11bml0KDAuNCwiY20iKSkpICsKICAgIGdlb21fc2VnbWVudChhZXMoeD0xMjkyLHk9MTUwMCx4ZW5kPTIwMjYseWVuZD0xNTAwKSxzaXplPTMsY29sb3I9J3JlZCcsYXJyb3c9YXJyb3cobGVuZ3RoPXVuaXQoMC40LCJjbSIpKSkgKwogICAgdGhlbWVfYncoKQpgYGAKKiBpZiB3ZSB3YW50IHRvIHBsb3QgdGhlIHNhbWUgZ3JwaCBmb3IgYWxsIGlzb2xhdGVzIHdlIGNhbiB1c2UgYGZhY2V0X3dyYXAoKWAuIG5vdCB0aGUgcmVtb3ZhbCBvZiB0aGUgYGZpbHRlcigpYCBmdW5jdGlvbiBhbmQgdGhlIGluY2x1c2lvbiBvZiB0aGUgYGdyb3VwPWlzb2xhdGVgIGluIHRoZSBgZ2dwbG90KClgIGZ1bmN0aW9uLCB0aGVuIHdlIGFwcGx5IGBmYWNldF93cmFwKH5pc29sYXRlKWAgdG8gY29tcGxldGUgdGhlIHByb2Nlc3MKCmBgYHtyIGZpZy5oZWlnaHQ9MjAsIGZpZy53aWR0aD0yMH0KbG9uZ19kYXRhICU+JQogIGdncGxvdChhZXMoeD1QT1MsIHk9ZGVwdGgsIGdyb3VwPWlzb2xhdGUpKSArIAogICAgZ2VvbV9obGluZSh5aW50ZXJjZXB0PTAsIGNvbG9yPSJyZWQiLCBzaXplPTIpICsgCiAgICBnZW9tX2xpbmUoc2l6ZT0yKSArCiMgICAgc2NhbGVfeV9sb2cxMCgpICsKICAgIGxhYnMoeD0nTGxhRzEgZ2VuZSBwb3NpdGlvbicsIHk9J1NlcXVlbmNlIGRlcHRoJykgKwogICAgZ2VvbV9zZWdtZW50KGFlcyh4PTEseT01MDAwLHhlbmQ9NDc3LHllbmQ9NTAwMCksc2l6ZT0zLGNvbG9yPSdibHVlJyxhcnJvdz1hcnJvdyhsZW5ndGg9dW5pdCgwLjQsImNtIikpKSArCiAgICBnZW9tX3NlZ21lbnQoYWVzKHg9MTI5Mix5PTUwMDAseGVuZD0yMDI2LHllbmQ9NTAwMCksc2l6ZT0zLGNvbG9yPSdyZWQnLGFycm93PWFycm93KGxlbmd0aD11bml0KDAuNCwiY20iKSkpICsKICAgIGdlb21faGxpbmUoZGF0YSA9IGNvdmVyYWdlcy5OQ18wMTc3NjMsIGFlcyh5aW50ZXJjZXB0ID0gY292ZXJhZ2UpLCBsaW5ldHlwZT0iZGFzaGVkIiwgY29sb3I9ImRhcmtncmVlbiIsIHNpemU9MSkgKwogICAgdGhlbWVfYncoKSArCiAgICBmYWNldF93cmFwKH5pc29sYXRlKQpgYGAKCiogV2UgY2FuIHNhdmUgdGhhdCBmaW5hbCBmaWd1cmUgdG8gYSBmaWxlIHdpdGggYGdnc2F2ZSgpYAoKYGBge3J9Cmdnc2F2ZSgnTkNfMDE4OTY5LW1hcHBpbmcucG5nJywgaGVpZ2h0PTIwLCB3aWR0aD0yMCkKYGBgCgojIEV4ZXJjaXNlCgoqIGxvYWQgdGhlIGRhdGFzZXQgc3RvcmVkIGluIGAvZGF0YS5jc3ZgLCBwbG90IGEgYmFyIGNoYXJ0IG9mIGBjb3VudHlgIGFnYWluc3QgYHBvcGFkdWx0c2AsIGJ1dCBwbG90IGZvciBhbGwgc3RhdGVzCgoKYGBge3J9CmRhdGEgPC0gcmVhZF9jc3YoJ2RhdGEuY3N2JykKCmRhdGEgJT4lCiAgZ2dwbG90KGFlcyh4PWNvdW50eSwgeT1wb3BhZHVsdHMsIGdyb3VwPXN0YXRlLCBjb2xvcj1jYXRlZ29yeSkpICsKICAgIGdlb21fcG9pbnQoKSArCiAgICBzY2FsZV95X2xvZzEwKGxhYmVscz1zY2FsZXM6OmNvbW1hKSArCiAgICBnZW9tX3Ntb290aCgpICsKICAgIGZhY2V0X3dyYXAofnN0YXRlLCBuY29sID0gNSkKCmRhdGEKYGBgCgoqIHNldCB0aGUgeS1heGVzIHRvIHVzZSBhIGxvZyBzY2FsZQoqIGNoYW5nZSB0aGUgeS1heGVzIGxhYmVscyB0byB1c2UgY29tbWFzIGluIHRoZSBudW1iZXJzCiogYWRkIGEgcmVncmVzc29uIGxpbmUKKiBjb2xvdXIgdGhlIHBvaW50cyBieSB0aGUgYGNhdGVnb3J5YCBjb2x1bW4KKiBpbmNsdWRlIGFsbCB0aGUgc3VicGxvdHMgaW4gb25lIHJvdy4gSGludDogY2hlY2sgYD9mYWNldF93cmFwYAoKCjxociBzdHlsZT0iYm9yZGVyOjJweCBzb2xpZCBncmF5Ij4gPC9ocj4KCiMgU2Vzc2lvbiBkZXRhaWxzCgoqIEdlbmVyYXRlIGRvY3VtZW50IHZlcnNpb24gZGV0YWlscwoKYGBge3J9CnNlc3Npb25JbmZvKCkKYGBgCg==